# vb 9/20/2022, not sure what this was doing, maybe an old version conflict?
#path <- "/Library/Frameworks/R.framework/Versions/3.5/Resources/library"
library(tidyverse)
library(data.table)
## Warning: package 'data.table' was built under R version 4.2.3
library(reshape2)
library(glmnet)
library(ggplot2)
library(ggridges)
library(DT)
## Warning: package 'DT' was built under R version 4.2.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
# find the most recent data
cv.glmnet <- "main_full_cv_glmnet_object_4Sep2025.rds"
results <- "final_forecasts_data_4Sep2025.csv"
# rename to fix soem variables
# pring when these files were created
file.info(cv.glmnet)$ctime
## [1] "2025-09-06 15:10:28 EDT"
file.info(results)$ctime
## [1] "2025-09-06 15:10:38 EDT"
# load the files
cv.glmnet <- readRDS(cv.glmnet)
results <- fread(results) %>%
mutate(
risk.2yr = risk.2025and2026.from2024, #changed to update years - NB
risk.1yr =risk.2025only.from2024 ) %>% #changed to update years - NB
dplyr::rename(country = ewp_name) %>%
#dplyr::select(-partyban.new.2) %>% # the last of the set, to be consistent with country
#rename(imr.sqrt = imr.fwd.fill.sqrt) %>%
as.data.frame()
# get the predictornames, but remove some that don't make sense to change
predictornames <- coef(cv.glmnet) %>%
rownames() %>% .[-1]
#predictornames <- predictornames[predictornames!= "partyban.new.2"]
#translate objects into John Ray speak
Xtest=results
Xtest$risk2017.2yr <- Xtest$risk.2yr
Xtest$country_name <- Xtest$country
model.out <- cv.glmnet
Xtest <- as.data.frame(Xtest) # datatable has stricter requirement
#data validation - note this is giving a warning because of small differences, so I added the code below to loosen up the requirement - NB
#stopifnot(all(Xtest$risk2017.2yr == signif(as.vector(predict(model.out, newx = as.matrix(Xtest[,predictornames]), s=model.out$lambda.min, type="response")),4)))
# Print both values for debugging
actual_values <- Xtest$risk2017.2yr
predicted_values <- signif(as.vector(predict(model.out, newx = as.matrix(Xtest[, predictornames]), s = model.out$lambda.min, type = "response")), 4)
# Check for exact matches
comparison <- data.frame(Actual = actual_values, Predicted = predicted_values)
print(comparison)
## Actual Predicted
## 1 0.104100 0.104100
## 2 0.081210 0.081140
## 3 0.080960 0.080940
## 4 0.075240 0.075180
## 5 0.074880 0.074860
## 6 0.072060 0.072000
## 7 0.061740 0.061760
## 8 0.059840 0.059800
## 9 0.055170 0.055120
## 10 0.055060 0.055010
## 11 0.052310 0.052300
## 12 0.048410 0.048400
## 13 0.048270 0.048270
## 14 0.043850 0.043810
## 15 0.042400 0.042400
## 16 0.040030 0.040000
## 17 0.039040 0.039060
## 18 0.037720 0.037700
## 19 0.035840 0.035840
## 20 0.035790 0.035800
## 21 0.033450 0.033450
## 22 0.032760 0.032770
## 23 0.031810 0.031800
## 24 0.031540 0.031550
## 25 0.030260 0.030260
## 26 0.030060 0.030070
## 27 0.027850 0.027840
## 28 0.027190 0.027180
## 29 0.026600 0.026580
## 30 0.026390 0.026390
## 31 0.026370 0.026360
## 32 0.025520 0.025530
## 33 0.025490 0.025470
## 34 0.024870 0.024880
## 35 0.024730 0.024740
## 36 0.024600 0.024600
## 37 0.024110 0.024100
## 38 0.023270 0.023260
## 39 0.023090 0.023070
## 40 0.022880 0.022860
## 41 0.022270 0.022260
## 42 0.022120 0.022110
## 43 0.021230 0.021230
## 44 0.020510 0.020510
## 45 0.020410 0.020400
## 46 0.019580 0.019570
## 47 0.019070 0.019070
## 48 0.018700 0.018710
## 49 0.018540 0.018530
## 50 0.017140 0.017140
## 51 0.016490 0.016500
## 52 0.016290 0.016290
## 53 0.015780 0.015780
## 54 0.014720 0.014720
## 55 0.014490 0.014480
## 56 0.014320 0.014310
## 57 0.014240 0.014240
## 58 0.013550 0.013540
## 59 0.012880 0.012880
## 60 0.012880 0.012870
## 61 0.012430 0.012430
## 62 0.012340 0.012330
## 63 0.012290 0.012290
## 64 0.012050 0.012040
## 65 0.011480 0.011480
## 66 0.011310 0.011300
## 67 0.010980 0.010980
## 68 0.010770 0.010760
## 69 0.010520 0.010520
## 70 0.010160 0.010150
## 71 0.010080 0.010080
## 72 0.010050 0.010060
## 73 0.010050 0.010040
## 74 0.010010 0.010000
## 75 0.009719 0.009722
## 76 0.009706 0.009704
## 77 0.009697 0.009693
## 78 0.009353 0.009345
## 79 0.009023 0.009021
## 80 0.008498 0.008491
## 81 0.008359 0.008357
## 82 0.008321 0.008323
## 83 0.008203 0.008201
## 84 0.007812 0.007805
## 85 0.007548 0.007542
## 86 0.007356 0.007358
## 87 0.007086 0.007087
## 88 0.006975 0.006977
## 89 0.006793 0.006795
## 90 0.006696 0.006698
## 91 0.006446 0.006441
## 92 0.006389 0.006385
## 93 0.006180 0.006183
## 94 0.006168 0.006163
## 95 0.005987 0.005986
## 96 0.005917 0.005917
## 97 0.005911 0.005907
## 98 0.005808 0.005808
## 99 0.005766 0.005766
## 100 0.005594 0.005590
## 101 0.005452 0.005453
## 102 0.005451 0.005446
## 103 0.005397 0.005397
## 104 0.005209 0.005205
## 105 0.005147 0.005142
## 106 0.004898 0.004898
## 107 0.004841 0.004837
## 108 0.004837 0.004832
## 109 0.004782 0.004779
## 110 0.004781 0.004782
## 111 0.004766 0.004762
## 112 0.004716 0.004713
## 113 0.004699 0.004699
## 114 0.004650 0.004651
## 115 0.004588 0.004589
## 116 0.004357 0.004356
## 117 0.004319 0.004318
## 118 0.004296 0.004296
## 119 0.004267 0.004268
## 120 0.004146 0.004144
## 121 0.004130 0.004129
## 122 0.004126 0.004124
## 123 0.004067 0.004068
## 124 0.003913 0.003911
## 125 0.003872 0.003869
## 126 0.003850 0.003850
## 127 0.003805 0.003805
## 128 0.003723 0.003724
## 129 0.003614 0.003614
## 130 0.003581 0.003580
## 131 0.003571 0.003568
## 132 0.003276 0.003275
## 133 0.003234 0.003234
## 134 0.003224 0.003223
## 135 0.003142 0.003140
## 136 0.003089 0.003087
## 137 0.003075 0.003075
## 138 0.003068 0.003069
## 139 0.003031 0.003030
## 140 0.002959 0.002959
## 141 0.002940 0.002938
## 142 0.002931 0.002930
## 143 0.002915 0.002914
## 144 0.002881 0.002881
## 145 0.002855 0.002853
## 146 0.002736 0.002736
## 147 0.002697 0.002696
## 148 0.002678 0.002676
## 149 0.002645 0.002643
## 150 0.002640 0.002637
## 151 0.002617 0.002616
## 152 0.002581 0.002580
## 153 0.002568 0.002567
## 154 0.002511 0.002509
## 155 0.002482 0.002482
## 156 0.002409 0.002408
## 157 0.002334 0.002333
## 158 0.002237 0.002237
## 159 0.002087 0.002087
## 160 0.002042 0.002041
## 161 0.002008 0.002008
## 162 0.001834 0.001833
## 163 0.001832 0.001831
## 164 0.001819 0.001819
## 165 0.001743 0.001743
## 166 0.001741 0.001741
## 167 0.001683 0.001681
## 168 0.001394 0.001395
# Use a tolerance level instead of enforcing exact match
tolerance <- 1e-3 # or another small value
all_equal <- all(abs(actual_values - predicted_values) < tolerance)
stopifnot(all_equal)
# newx: just the Xs, as a matrix
new_preds = matrix(nrow = dim(Xtest)[1], ncol = length(predictornames))
colnames(new_preds) <- predictornames
# for every IV, take the mean of the IV or mode if 0/1, and rerun the glmnet prediction with observations' values set to that IV's mean
for(i in 1:length(predictornames)){
newx = Xtest[, predictornames] %>% as.matrix()
iter_mu <- rep(NA, nrow(newx))
# if the variable has 3 or fewer unique values, set it to mode, not mean; OR if I've prespecified it
# notice that this is the mode for the current year, not for all time
if(length(table(newx[, predictornames[i]] )) <= 3 | predictornames[i] %in% c("religiousfreedom", "mk_ongoing_count_log")){
iter_mu <- rep(as.numeric(names(tail(sort(table(newx[, predictornames[i]] )), 1))), nrow(newx))
} else {
iter_mu <- rep(mean(Xtest[, predictornames[i]], na.rm = T), nrow(newx))
}
#but some variables are tied together, need to prespecify those here
# set all party ban to category 4 (when partyban.new.2 ==1)
for(j in 1:length(iter_mu)){
if(colnames(newx)[predictornames[i]] %in% c('partyban.new.0', 'partyban.new.1') & sum(newx[j, c('partyban.new.0', 'partyban.new.1')]) > 0){
iter_mu[j] <- 0
}
}
#note I changed reg.afr to reg.na - NMB
for(j in 1:length(iter_mu)){
if(colnames(newx)[predictornames[i]] %in% c('reg.na', 'reg.eap', 'reg.eur', 'reg.mna', 'reg.sca') & sum(newx[j, c('reg.na', 'reg.eap', 'reg.eur', 'reg.mna', 'reg.sca')]) > 0){
iter_mu[j] <- 0
}
}
# set all ongoing counts to 0
# which then also has implications for other variables
#note updated the name of this var - NMB
if(predictornames[i]=="mk_ongoing_count_log2"){
# set the MK variable
# this will get implemented later
# we could manually specify ithere but
# this is just how the code was set up by Jay
iter_mu <- rep(0, nrow(newx))
# set other dependent variables
# these can execute now
newx[,"widetargeting"] <- 0
newx[,"narrowtargeting"] <- 0
}
newx[, predictornames[i]] <- iter_mu
# vb 9/12/2020, adding the rounding that the model uses
new_preds[, predictornames[i]] <- signif(predict(model.out, newx = newx, s="lambda.min", type="response") ,4)
#vb, not sure what this line is doing because its not saving anywhere so commenting it out
#predict.cv.glmnet(cv.glmnet, newx = as.matrix(results[,predictornames]), s="lambda.min", type="response")
}
# New column names are the 'predictornames' values plus '_new_mean'
colnames(new_preds) <- paste0(predictornames, '_new_mean')
# Combine the new predictions to the full data
#pred_dat <- cbind(newx, new_preds)
#this is wrong, newx has polity2.fl.3 == 0 for all observations; really want X-test, vbauer
pred_dat <- cbind(Xtest[, predictornames], new_preds)
#explore what this is doing
#pred_dat <- data.frame(pred_dat)
#pred_dat$risk.2yr <- Xtest$risk2017.2yr
#pred_dat[,colnames(pred_dat)[grepl("polity|risk", colnames(pred_dat))]]
# compute the prediction shifts as 'new prediction with IVs set to their means - original prediction'
mean_shifts = matrix(nrow = dim(newx)[1], ncol = dim(newx)[2])
# do this for each IV
# someone really needs to teach John Ray about vectorization
for(i in 1:length(predictornames)){
mean_shifts[, i] <- Xtest$risk2017.2yr - new_preds[, i]
}
# vincent code, will get overwritten
region.names <- c("reg.na", "reg.eap", "reg.eur", "reg.mna", "reg.sca") #changed reg.afr to reg.na - NMB
party.names <- c('partyban.new.0', 'partyban.new.1')
mean_shifts.save <- mean_shifts
colnames(mean_shifts) <- predictornames
mean_shifts <- as.data.frame(mean_shifts)
mean_shifts$country_name <- results$country
mean_shifts$region <- unlist(apply(mean_shifts[,region.names], 1, function(x) ifelse(sum(x) == 0, 0, x[x!=0])))
mean_shifts$partyban <- unlist(apply(mean_shifts[,party.names], 1, function(x) ifelse(sum(x) == 0, 0, x[x!=0])))
mean_shifts <- mean_shifts[, !(colnames(mean_shifts) %in% c(region.names, party.names))]
saveRDS(mean_shifts, file = paste0("meanshifts_", Sys.Date(), ".rds"))
mean_shifts <- mean_shifts.save
# the sizes of the shifts from the original predictions to the new predictions with each IV changed one at a time are stored in the variables labeled predictornames plus '_diffs_with_mean_shifts'
colnames(mean_shifts) <- paste0(predictornames, '_diffs_with_mean_shifts')
# merge the new IVs to the full data
pred_dat <- cbind(pred_dat, mean_shifts) %>% data.frame()
pred_dat$risk2 <- Xtest$risk2017.2yr
pred_dat$country_name <- as.character(Xtest$country_name)
# create an index to plot the top 25 risk countries
#pred_dat <- pred_dat[rev(order(pred_dat$risk2)),]
#pred_dat$plot_order <- 1:nrow(pred_dat)
#pred_dat <- pred_dat[order(pred_dat$plot_order),]
# Put all the actual mean shifts into one region variable, then join that to the plot data
new_mean_region <- pred_dat$risk2
new_mean_region[pred_dat$reg.na == 1] <- pred_dat$reg.na_new_mean[pred_dat$reg.na == 1] #changed reg.afr to reg.na - NMB
new_mean_region[pred_dat$reg.eap == 1] <- pred_dat$reg.eap_new_mean[pred_dat$reg.eap == 1]
new_mean_region[pred_dat$reg.eur == 1] <- pred_dat$reg.eur_new_mean[pred_dat$reg.eur == 1]
new_mean_region[pred_dat$reg.mna == 1] <- pred_dat$reg.mna_new_mean[pred_dat$reg.mna == 1]
new_mean_region[pred_dat$reg.sca == 1] <- pred_dat$reg.sca_new_mean[pred_dat$reg.sca == 1]
# Put all the new predictions into one partyban variable, then join that to the plot data
new_mean_partyban <- pred_dat$risk2
new_mean_partyban[pred_dat$partyban.new.0 == 1] <- pred_dat$partyban.new.0_new_mean[pred_dat$partyban.new.0 == 1]
new_mean_partyban[pred_dat$partyban.new.1 == 1] <- pred_dat$partyban.new.1_new_mean[pred_dat$partyban.new.1 == 1]
pred_dat$new_mean_region <- new_mean_region
pred_dat$new_mean_partyban <- new_mean_partyban
# Same thing with the mean shifts
new_mean_shift_region <- rep(0, nrow(pred_dat))
new_mean_shift_region[pred_dat$reg.na == 1] <- pred_dat$reg.na_diffs_with_mean_shifts[pred_dat$reg.na == 1] #changed reg.afr to reg. na - NMB
new_mean_shift_region[pred_dat$reg.eap == 1] <- pred_dat$reg.eap_diffs_with_mean_shifts[pred_dat$reg.eap == 1]
new_mean_shift_region[pred_dat$reg.eur == 1] <- pred_dat$reg.eur_diffs_with_mean_shifts[pred_dat$reg.eur == 1]
new_mean_shift_region[pred_dat$reg.mna == 1] <- pred_dat$reg.mna_diffs_with_mean_shifts[pred_dat$reg.mna == 1]
new_mean_shift_region[pred_dat$reg.sca == 1] <- pred_dat$reg.sca_diffs_with_mean_shifts[pred_dat$reg.sca == 1]
new_mean_shift_partyban <- rep(0, nrow(pred_dat))
new_mean_shift_partyban[pred_dat$partyban.new.0 == 1] <- pred_dat$partyban.new.0_diffs_with_mean_shifts[pred_dat$partyban.new.0 == 1]
new_mean_shift_partyban[pred_dat$partyban.new.1 == 1] <- pred_dat$partyban.new.1_diffs_with_mean_shifts[pred_dat$partyban.new.1 == 1]
pred_dat$new_mean_shift_region <- new_mean_shift_region
pred_dat$new_mean_shift_partyban <- new_mean_shift_partyban
#check what's going on
#pred_dat[pred_dat$country_name=="Pakistan",colnames(pred_dat)[grepl("polity|risk|country_name", colnames(pred_dat))]]
#changed reg.afr to reg.na - NMB
mean_vars <- c(colnames(new_preds)[!(colnames(new_preds) %in% c('reg.na_new_mean','reg.eap_new_mean','reg.eur_new_mean','reg.mna_new_mean','reg.sca_new_mean',
'partyban.new.0_new_mean', 'partyban.new.1_new_mean'))], 'new_mean_region', 'new_mean_partyban')
# format the data for plotting
mean_plotdat = melt(pred_dat[, c('country_name', mean_vars)], id.vars = c('country_name'))
mean_plotdat$variable = gsub("_new_mean", "", mean_plotdat$variable)
#changed reg.afr to reg.na - NMB
shift_vars <- c(colnames(mean_shifts)[!colnames(mean_shifts) %in% c('reg.na_diffs_with_mean_shifts', 'reg.eap_diffs_with_mean_shifts', 'reg.eur_diffs_with_mean_shifts', 'reg.mna_diffs_with_mean_shifts', 'reg.sca_diffs_with_mean_shifts',
'partyban.new.0_diffs_with_mean_shifts', 'partyban.new.1_diffs_with_mean_shifts')], 'new_mean_shift_region','new_mean_shift_partyban')
shift_size_plotdat = melt(pred_dat[, c('country_name', shift_vars)], id.vars = c('country_name'))
shift_size_plotdat$variable = gsub("_diffs_with_mean_shifts", "", shift_size_plotdat$variable)
# merge the original predictions to the predicted mean shifts
mean_shift_plotdat_1 = pred_dat[, c('country_name', 'risk2') ]
mean_shift_plotdat_2 = melt(pred_dat[, c('country_name', shift_vars)], id.vars = c('country_name'))
mean_shift_plotdat = dplyr::left_join(mean_shift_plotdat_1, mean_shift_plotdat_2, by="country_name")
mean_shift_plotdat$variable = gsub("_new_mean", "", mean_shift_plotdat$variable, fixed = T)
colnames(mean_shift_plotdat) <- c('country_name','start','variable','end')
#pred_dat <- pred_dat[, !colnames(pred_dat) %in% c('includesnonstate')]
#pred_dat <- pred_dat[rank(pred_dat$risk2), ] #not the correct function to use, needs to be order, vbauer
pred_dat <- pred_dat[order(pred_dat$risk2), ] #
#data validation
stopifnot(all(table(pred_dat$country_name)==1))
write.csv(pred_dat, 'new_prediction_data_2.csv', row.names = F)
#predictornames <- predictornames[!predictornames %in% c('includesnonstate')]
Note,removing durable.ln_diffs_with_mean_shifts from colstouse ( 8/18/2020 )
# Make thing that gives top X shifters and their amount or at least sign, by country
# drop the intercept - added from Chad's code - NMB
predictornames <- predictornames[-1]
# specify the columns to use, which means getting rid of region and partyban as well as some other variables that dont make sense
#changed reg.afr to reg.na - NMB
colstouse <- predictornames %>% paste0(., "_diffs_with_mean_shifts")
colstouse[colstouse == "reg.na_diffs_with_mean_shifts"] <- "new_mean_shift_region"
colstouse[colstouse == "reg.eap_diffs_with_mean_shifts"] <- "new_mean_shift_region"
colstouse[colstouse == "reg.eur_diffs_with_mean_shifts"] <- "new_mean_shift_region"
colstouse[colstouse == "reg.mna_diffs_with_mean_shifts"] <- "new_mean_shift_region"
colstouse[colstouse == "reg.sca_diffs_with_mean_shifts"] <- "new_mean_shift_region"
colstouse[colstouse == "partyban.new.0_diffs_with_mean_shifts"] <- "new_mean_shift_partyban"
colstouse[colstouse == "partyban.new.1_diffs_with_mean_shifts"] <- "new_mean_shift_partyban"
colstouse[colstouse == "year_diffs_with_mean_shifts"] <- NA # does not make sense to shift
colstouse[colstouse == "year_sq_diffs_with_mean_shifts"] <- NA # does not make sense to shift
colstouse <- na.omit(colstouse)
colstouse <- unique(colstouse) # some are now duplicated, so remove those
colstouse <- c("country_name", "risk2", colstouse) # add some additional columns
# all variables should be in pred_data
stopifnot(length(colstouse[!(colstouse %in% colnames(pred_dat))])==0)
usefulshiftdata = pred_dat[,colstouse]
colnames(usefulshiftdata)=gsub(colnames(usefulshiftdata), pattern="_diffs_with_mean_shifts", replacement="")
usefulshiftdata = usefulshiftdata %>% arrange(-risk2)
# Create long list where first column is country name repeated P times; second column holds the sequence of top P "shifting" variables for that country, and the next column contains the amount (and sign) of the corresponding shifts.
P=5 # number of top factors to include
rowindex=1
bigshifters=matrix(NA,nrow=P*nrow(usefulshiftdata), ncol=3)
colnames(bigshifters) = c("country_name","variable","shift")
for (j in 1:nrow(usefulshiftdata)){
print(usefulshiftdata$country_name[j])
subdat=-1*usefulshiftdata[j,3:ncol(usefulshiftdata)] #-1 to change direction so +1 is higher risk.
#orderindex=suppressWarnings(order(abs(subdat), decreasing=TRUE))
orderindex = order(abs(as.numeric(subdat)), decreasing=TRUE) #changed to match Chad's code - NMB
keepers=subdat[orderindex][1:P]
bigshifters[seq(rowindex,rowindex+P-1),"country_name"]=usefulshiftdata$country_name[j]
bigshifters[seq(rowindex,rowindex+P-1),"variable"] = names(keepers)
bigshifters[seq(rowindex,rowindex+P-1),"shift"] = as.numeric(keepers)
rowindex=rowindex+P
}
## [1] "Burma/Myanmar"
## [1] "Chad"
## [1] "Sudan"
## [1] "India"
## [1] "Burkina Faso"
## [1] "Pakistan"
## [1] "Afghanistan"
## [1] "Nigeria"
## [1] "Yemen"
## [1] "Guinea"
## [1] "Niger"
## [1] "Indonesia"
## [1] "Ethiopia"
## [1] "Democratic Republic of Congo"
## [1] "Iran"
## [1] "Cambodia"
## [1] "Mali"
## [1] "Bangladesh"
## [1] "Iraq"
## [1] "Syria"
## [1] "Philippines"
## [1] "Russia"
## [1] "Somalia"
## [1] "Mozambique"
## [1] "Turkey"
## [1] "Angola"
## [1] "China"
## [1] "Laos"
## [1] "Republic of Congo"
## [1] "Tajikistan"
## [1] "South Sudan"
## [1] "Egypt"
## [1] "Thailand"
## [1] "Uganda"
## [1] "Central African Republic"
## [1] "Ivory Coast"
## [1] "Tanzania"
## [1] "Algeria"
## [1] "Eritrea"
## [1] "Burundi"
## [1] "Nepal"
## [1] "South Africa"
## [1] "Kenya"
## [1] "Cameroon"
## [1] "Sierra Leone"
## [1] "Malawi"
## [1] "Liberia"
## [1] "Sri Lanka"
## [1] "Democratic Republic of Vietnam"
## [1] "Gabon"
## [1] "Libya"
## [1] "Haiti"
## [1] "Zimbabwe"
## [1] "Colombia"
## [1] "Togo"
## [1] "Mauritania"
## [1] "Madagascar"
## [1] "Equatorial Guinea"
## [1] "Rwanda"
## [1] "Ukraine"
## [1] "Zambia"
## [1] "Morocco"
## [1] "Bahrain"
## [1] "Nicaragua"
## [1] "Uzbekistan"
## [1] "North Korea"
## [1] "Namibia"
## [1] "Senegal"
## [1] "Peru"
## [1] "Georgia"
## [1] "Jordan"
## [1] "Djibouti"
## [1] "Bosnia and Herzegovina"
## [1] "Brazil"
## [1] "Argentina"
## [1] "Lebanon"
## [1] "United States of America"
## [1] "Bhutan"
## [1] "Venezuela"
## [1] "Mexico"
## [1] "Israel"
## [1] "Botswana"
## [1] "Guinea-Bissau"
## [1] "United Arab Emirates"
## [1] "Papua New Guinea"
## [1] "El Salvador"
## [1] "Benin"
## [1] "Kazakhstan"
## [1] "Eswatini"
## [1] "Comoros"
## [1] "Guatemala"
## [1] "Azerbaijan"
## [1] "South Korea"
## [1] "Fiji"
## [1] "Canada"
## [1] "Moldova"
## [1] "Ecuador"
## [1] "Timor-Leste"
## [1] "Ghana"
## [1] "Poland"
## [1] "Bolivia"
## [1] "Romania"
## [1] "Honduras"
## [1] "Kyrgystan"
## [1] "Paraguay"
## [1] "Hungary"
## [1] "Saudi Arabia"
## [1] "United Kingdom"
## [1] "Serbia"
## [1] "Japan"
## [1] "Malaysia"
## [1] "Cyprus"
## [1] "Turkmenistan"
## [1] "Panama"
## [1] "Czech Republic"
## [1] "Maldives"
## [1] "Slovakia"
## [1] "Dominican Republic"
## [1] "Germany"
## [1] "Spain"
## [1] "France"
## [1] "Croatia"
## [1] "Chile"
## [1] "Belarus"
## [1] "Italy"
## [1] "Lesotho"
## [1] "Greece"
## [1] "The Gambia"
## [1] "Australia"
## [1] "Uruguay"
## [1] "Guyana"
## [1] "Netherlands"
## [1] "Switzerland"
## [1] "Tunisia"
## [1] "Belgium"
## [1] "Oman"
## [1] "Kuwait"
## [1] "Singapore"
## [1] "Mongolia"
## [1] "New Zealand"
## [1] "Portugal"
## [1] "Austria"
## [1] "Bulgaria"
## [1] "Qatar"
## [1] "Sweden"
## [1] "Cuba"
## [1] "Kosovo"
## [1] "Macedonia"
## [1] "Albania"
## [1] "Mauritius"
## [1] "Denmark"
## [1] "Armenia"
## [1] "Ireland"
## [1] "Solomon Islands"
## [1] "Finland"
## [1] "Norway"
## [1] "Lithuania"
## [1] "Latvia"
## [1] "Slovenia"
## [1] "Cape Verde"
## [1] "Estonia"
## [1] "Luxembourg"
## [1] "Montenegro"
## [1] "Costa Rica"
## [1] "Malta"
## [1] "Jamaica"
## [1] "Trinidad and Tobago"
## [1] "Suriname"
write.csv(bigshifters, file = paste0("bigshiftvariables_", Sys.Date(), ".csv"))
The problem with comparing differences in the average value of these variables is that variables with a larger scale (i.e. from -100 to 100) will appear to be more important than variables with a smaller scale (-1 to 1). I show the difference in the actual means and also between the standardized means (centering all variables to have mean 0 and standard deviation 1).
Variable definitions
Note to self
Among high-risk/low-risk countries. Setting the cut-off for “high risk” as 4% + predicted chance of mass killing in the next two years.
Among countries that have experienced mass killings. Setting the cut-off for “high risk” as any mass killing ever.
importances <- importances[order(-abs(importances$var_importance)),]
importances <- importances[!is.na(importances$var_importance),]
importances$varname <- factor(importances$varname, levels = rev(importances$varname))
importances$var_importance <- abs(importances$var_importance)
ggplot(importances, aes(x = var_importance, y = varname)) +
geom_point() +
labs(x = "Variable Importance", y = "Variable") +
theme_bw() +
margin
The value for partyban is 0 which is categories 0/1 in VDEM (this note was prev here - NB)
The value for region is North America (changed from africa - NB)
## Setting new_mean_shift_partyban to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: partyban.2: no parties banned
## Setting new_mean_shift_region to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: africa
## Setting even_civilrights to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0
## Setting popsize.log2 to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 23.55
## Setting mk_ever to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0
## Setting un_imr_sqrt to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 3.9
## Setting coup.try.5yr to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0
## Setting pol_killing_approved to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0
## Setting battledeaths.log2 to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 1.66
## Setting discrimpop to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0.02
## Setting ses_power_dist to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0.39
## Setting efindex to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0.46
## Setting ios.iccpr1 to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 1
## Setting social_power_dist to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 1
## Setting narrowtargeting to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0
## Setting minorityrule to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0
## Setting newcountry to its mode (or mean if continuous)
## and change in MK prob, 2-year horizon
## set the value to: 0
## MK risk change in Burma/Myanmar
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Chad
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Sudan
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in India
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Burkina Faso
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Pakistan
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Afghanistan
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Nigeria
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Yemen
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Guinea
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Niger
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Indonesia
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Ethiopia
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Democratic Republic of Congo
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Iran
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Cambodia
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Mali
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Bangladesh
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Iraq
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Syria
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Philippines
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Russia
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Somalia
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Mozambique
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Turkey
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Angola
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in China
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Laos
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Republic of Congo
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Tajikistan
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in South Sudan
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Egypt
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Thailand
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Uganda
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Central African Republic
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Ivory Coast
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Tanzania
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Algeria
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Eritrea
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Burundi
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Nepal
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in South Africa
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Kenya
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Cameroon
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Sierra Leone
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Malawi
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Liberia
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Sri Lanka
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Democratic Republic of Vietnam
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Gabon
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Haiti
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Equatorial Guinea
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Rwanda
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in United States of America
## by shifting variables to mean/mode, 2-year horizon
## MK risk change in Venezuela
## by shifting variables to mean/mode, 2-year horizon